import json
import time
from selenium import webdriver

from const import origin_url
from const import target_url

driver = webdriver.Chrome()

driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
    "source": """
    Object.defineProperty(navigator, 'webdriver', {
      get: () => undefined
    })
  """
})


driver.get(origin_url)
driver.get(target_url)


# 大分类元素集合
cata_ele = driver.find_elements_by_xpath("//a[@class='hei14b']")
cata_list = []
for c in cata_ele:
    cata_list.append(
        {
            "name": ''.join(c.text.split()),
            "link": c.get_attribute("href")

        })

links = []
# 遍历大类
for index, c in enumerate(cata_list):
    driver.get(c['link'])
    # time.sleep(2)

    sub_ele = driver.find_elements_by_xpath("//a[@class='hei12']")

    # 遍历学科
    for s in sub_ele:
        # print(s.text)
        title = s.get_attribute("innerHTML")

        link = s.get_attribute("href")

        links.append((c['name'],
                      title, link))
        # 遍历表格行数据
        # driver.get(link)
        # elem = driver.find_element_by_xpath("//*")
        # source_code = elem.get_attribute("outerHTML")

        # with open('./pages/{}-{}.html'.format(c['name'], title), 'wb') as f:
        #     f.write(source_code.encode('gb2312'))
    time.sleep(2)

with open("links.json", "w", encoding='utf-8') as f:
    f.write(json.dumps(links, ensure_ascii=False))
driver.close()
